{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import h5py\n",
    "import scipy.io\n",
    "np.random.seed(1337) # for reproducibility\n",
    "\n",
    "import keras\n",
    "import tensorflow as tf\n",
    "from keras.preprocessing import sequence\n",
    "from keras.optimizers import RMSprop\n",
    "from keras.models import Sequential\n",
    "from keras.layers.core import Dense, Dropout, Activation, Flatten\n",
    "from keras.layers.convolutional import Convolution1D, MaxPooling1D\n",
    "from keras.regularizers import l2\n",
    "from keras.constraints import maxnorm\n",
    "from keras.layers.recurrent import LSTM, GRU\n",
    "from keras.callbacks import ModelCheckpoint, EarlyStopping\n",
    "from keras.layers import Bidirectional\n",
    "\n",
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading data\n"
     ]
    }
   ],
   "source": [
    "print 'loading data'\n",
    "trainmat = h5py.File('deepsea_data/train.mat')\n",
    "validmat = scipy.io.loadmat('deepsea_data/valid.mat')\n",
    "testmat = scipy.io.loadmat('deepsea_data/test.mat')\n",
    "\n",
    "X_train = np.transpose(np.array(trainmat['trainxdata']),axes=(2,0,1))\n",
    "y_train = np.array(trainmat['traindata']).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "forward_lstm = LSTM(units=320, return_sequences=True)\n",
    "# backward_lstm = LSTM(input_dim=320, output_dim=320, return_sequences=True)\n",
    "brnn = Bidirectional(forward_lstm)\n",
    "\n",
    "print 'building model'\n",
    "\n",
    "model = Sequential()\n",
    "model.add(Convolution1D(activation=\"relu\", \n",
    "                        input_shape=(1000, 4), \n",
    "                        padding=\"valid\", strides=1, \n",
    "                        filters=320, kernel_size=26))\n",
    "\n",
    "model.add(MaxPooling1D(strides=13, pool_size=13))\n",
    "\n",
    "model.add(Dropout(0.2))\n",
    "\n",
    "model.add(brnn)\n",
    "\n",
    "model.add(Dropout(0.5))\n",
    "\n",
    "model.add(Flatten())\n",
    "\n",
    "model.add(Dense(input_dim=75*640, units=925))\n",
    "model.add(Activation('relu'))\n",
    "\n",
    "model.add(Dense(input_dim=925, units=919))\n",
    "model.add(Activation('sigmoid'))\n",
    "\n",
    "print 'compiling model'\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['cosine'])\n",
    "\n",
    "print 'running at most 60 epochs'\n",
    "\n",
    "checkpointer = ModelCheckpoint(filepath=\"DanQ_bestmodel.hdf5\", verbose=1, save_best_only=True)\n",
    "earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)\n",
    "\n",
    "model.fit(X_train, y_train, batch_size=2048, epochs=60, \n",
    "          shuffle=True,\n",
    "          validation_data=(np.transpose(validmat['validxdata'],axes=(0,2,1)), validmat['validdata']), \n",
    "          callbacks=[checkpointer,earlystopper])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "455024/455024 [==============================] - 72s 158us/step\n",
      "[0.06152647561230429, -0.30007137424549335]\n"
     ]
    }
   ],
   "source": [
    "tresults = model.evaluate(np.transpose(testmat['testxdata'],axes=(0,2,1)), \n",
    "                          testmat['testdata'])\n",
    "\n",
    "print tresults"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "preds = model.predict(np.transpose(testmat['testxdata'],axes=(0,2,1)))\n",
    "preds[preds>=0.5] = 1\n",
    "preds[preds<0.5] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "preds_flat = preds.flatten()\n",
    "test_flat = testmat['testdata'].flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9812509859695882\n",
      "[[409043292    529472]\n",
      " [  7310748   1283544]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import confusion_matrix\n",
    "print(accuracy_score(test_flat, preds_flat))\n",
    "print(confusion_matrix(test_flat, preds_flat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
